In [ ]:
%matplotlib inline
import os
import sys
#import pysam
import pandas as pd
#import pybedtools as pybed
import matplotlib as mpl
import matplotlib.pyplot as plt
from __future__ import division
# Libraries just for this problem
import operator
import itertools
In [11]:
def occurrences(string, sub):
""" Counts overlapping string occurrences """
count = start = 0
while True:
start = string.find(sub, start) + 1
if start > 0:
count += 1
else:
return count
In [ ]:
def max_dict_by_value(dictionary):
"""Returns top dictionary key and value"""
return dict(max(dictionary.iteritems(), key=operator.itemgetter(1)))
In [38]:
def most_frequent_kmer1(seq, kmer_len, num_kmers):
""" Returns most frequent kmer of length kmer_len
within a sequence of interest"""
kmers = dict()
for i in range(len(seq) - kmer_len + 1):
kmer = string[i:i+kmer_len]
if kmer in kmers:
kmers[kmer] += 1
else:
kmers[kmer] = 1
for i in range(num_kmers):
maximum = max_dict_by_value(kmers)
key = maximum.keys()
kmers.pop(maximum.key(), None)
In [38]:
def most_frequent_kmer2(seq, kmer_len):
""" Returns most frequent kmer of length kmer_len
within a sequence of interest"""
kmers = dict()
perms = [''.join(p) for p in itertools.product("ACTG", repeat=kmer_len)]
for perm in perms:
kmers[perm] = occurrences(string, perm)
In [6]:
string = "ACGTTGCATGTCGCATGATGCATGAGAGCT"
kmer_len = 4
kmers = dict()
In [9]:
perms = [''.join(p) for p in permutations('ACTG')]
dictionary = dict()
for perm in perms:
dictionary[perm] = occurrences(string, perm)
In [16]:
sorted_kmers = sorted(kmers.iteritems(), key=operator.itemgetter(1))
Out[16]:
In [40]:
most_frequent_kmer1(string, kmer_len)
Out[40]:
In [33]:
max(kmers, key = lambda x: kmers.get(x))
Out[33]:
In [34]:
maximum = max(kmers.values())
keys = [x for x,y in kmers.items() if y == maximum]
In [36]:
" ".join(keys)
Out[36]:
In [ ]: